import requests
from bs4 import BeautifulSoup
import json
import re
import multiprocessing
import xlwt
import time

def request_html(url):
    headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36'}
    try:
       response = requests.get(url,headers=headers)
       html = response.text
    except Exception as e:
        print(e)
    return html
def parse_html(html):
    output = []
    start = html.index('[')
    end = html.index('])') + 1
    data = json.loads(html[start:end])
    try:
        for item in data:
            title = item['title']
            docurl = item['docurl']
            label = item['label']
            source = item['source']
            output.append([title, docurl, label, source])
            time.sleep(2)
        return output
    except:
        print("当前页面爬取失败")
def main():
    # for i in range(1,4):
    #     if i == 1:
    url = 'https://news.163.com/special/cm_guonei/?callback=data_callback'
    print('开始爬取',url)
        # else:
        #     for page in range(2, 4):
        #         url = 'https://news.163.com/special/cm_guonei_' + str(page) + '/?callback=data_callback'
        #         print('开始爬取', url)
    html = request_html(url)
    result = parse_html(html)
    print(result)

if __name__ == '__main__':
    main()
